library('dplyr')
library('dada2')
library('tidyverse')
library('shiny')
library('phyloseq')
library('Biostrings')
library('tibble')
library('hrbrthemes')
library('microbiomeutilities')
library('ggplot2')
library('microViz')
library('markdown')
library('microbiome')
library('ggtext')
library('patchwork')
library('ggpubr')
library('ggrepel')
library('knitr')
library('tibble')
library('decontam')
library('DESeq2')
library('dendextend')
library('vegan')
library('ggforce')
library('corncob')
threads <- 6
## Registered S3 method overwritten by 'ggside':
## method from
## +.gg ggplot2
## Registered S3 methods overwritten by 'treeio':
## method from
## MRCA.phylo tidytree
## MRCA.treedata tidytree
## Nnode.treedata tidytree
## Ntip.treedata tidytree
## ancestor.phylo tidytree
## ancestor.treedata tidytree
## child.phylo tidytree
## child.treedata tidytree
## full_join.phylo tidytree
## full_join.treedata tidytree
## groupClade.phylo tidytree
## groupClade.treedata tidytree
## groupOTU.phylo tidytree
## groupOTU.treedata tidytree
## is.rooted.treedata tidytree
## nodeid.phylo tidytree
## nodeid.treedata tidytree
## nodelab.phylo tidytree
## nodelab.treedata tidytree
## offspring.phylo tidytree
## offspring.treedata tidytree
## parent.phylo tidytree
## parent.treedata tidytree
## root.treedata tidytree
## rootnode.phylo tidytree
## sibling.phylo tidytree
## Registered S3 method overwritten by 'ggtree':
## method from
## identify.gg ggfun
After downloading the cpn60 refdb classifier, I added known input sequences to fasta and taxonomy files and saved as: cpn60_v11.seqs.fasta.
# Import taxonomy
qiime tools import --type 'FeatureData[Taxonomy]' --input-format HeaderlessTSVTaxonomyFormat --input-path cpn60_v11_taxonomy_table.txt --output-path cpn60_v11_taxonomy_table.qza
# Import ref seqs
qiime tools import --type 'FeatureData[Sequence]' --input-path cpn60_v11_seqs.fasta --output-path cpn60_v11_seqs.qza
# Train the classifier
qiime feature-classifier fit-classifier-naive-bayes --i-reference-reads cpn60_v11_seqs.qza --i-reference-taxonomy cpn60_v11_taxonomy_table.qza --o-classifier cpn60_classifier_v11.qza
# Trained classifier from paper ref'ed above with known input taxa added is saved as: /202310/cpn60_classifier_v11.qza
# Load in a list of all F reads (only using F reads for hsp60 samples, this is totally fine)
fnFs <- sort(list.files('./euprymna_fastq', pattern='_R1_001.fastq', full.names = TRUE))
# Check file names
fnFs
# Assign file names for the filtered fastq files
sample.names <- c("BacMixA", "BacMixB", "ES114A", "ES114B", "EsApoA", "EsApoB", "kitcon", "MB13A", "MB15A", "MB15B", "MB211A", "MB211B", "MB212A", "MB212B", "SymES114A", "SymES114B", "SymES114VentA", "SymES114VentB", "SymWtA", "SymWtB", "SymWtC", "VfMixA", "VfMixB")
sample.names
euprymna.hsp60.metadata <- read.delim("euprymna_hsp60_metadata.csv", header=TRUE, sep=",")
# Check quality and filter
plotQualityProfile(fnFs[1:2])
# assign file names for filtered fastq files
filtFs <- file.path("euprymna_hsp60_filtered", paste0(sample.names, "_F_filt.fastq.gz"))
names(filtFs) <- sample.names
# trimLeft comes from hsp60 primer length (all = 26 bp)
out <- filterAndTrim(fwd=fnFs, filt=filtFs,
trimLeft=26,
maxN=0,
maxEE=2,
compress=TRUE, verbose=TRUE, multithread=TRUE, rm.phix=TRUE)
head(out)
# make sure a reasonable number of reads were filtered (ideally retains >50k reads/sample-ish)
# Learn the error rates: look at how trimming affects the quality reports
timestamp()
errF <- learnErrors(filtFs, multithread=threads)
saveRDS(errF, file='errF_sampled.rds')
plotFerr <- plotErrors(errF, nominalQ=T)
suppressWarnings(print(plotFerr))
timestamp()
dadaF <- dada(filtFs, err=errF, multithread=threads, pool=FALSE)
dadaF[[1]]
# 296 sequence variants were inferred from 39500 input unique sequences
# Here you would typically merge reads, but I'm only using F reads so no merging is necessary
# Dereplication is done automatically by dada2 when given file names (which I did) so no additional dereplication step is necessary
# Make an ASV table!
seqtab <- makeSequenceTable(dadaF)
class(seqtab) # matrix array
dim(seqtab) # 23 6171
# ^ There were 6171 ASVs identified across 23 samples
# inspect the distribution of sequence lengths
table(nchar(getSequences(seqtab))) # 274 6171
# ^ all 6171 ASVs were 274 bp, which is expected because of the trim parameters used
# remove chimeras
seqtab.nochim.es <- removeBimeraDenovo(seqtab, method = "consensus", multithread = threads, verbose = TRUE)
# identified 110 bimeras out of 6171 input sequences
dim(seqtab.nochim.es) # 23 6061
sum(seqtab.nochim.es)/sum(seqtab) # 0.9692
# ^ After removing chimeric ASVs, we're left with 6061 ASVs (96.92% of ASVs were non-chimeric)
# track how many reads were lost during each step of processing:
getN <- function(x) sum(getUniques(x))
summary_tab <- data.frame(row.names=sample.names,
"DADA2 Input"=out[,1],
"Filtered"=out[,2],
"Denoised"=sapply(dadaF, getN),
"Final Read Count"=rowSums(seqtab.nochim.es),
"Percent Reads Retained"=round(rowSums(seqtab.nochim.es)/out[,1]*100, 1))
# save summary table as a file
write.table(summary_tab, "euprymna_hsp60_reads_tracked.tsv", quote=FALSE, sep="\t", col.names = NA)
# write unique seqs to a .fasta file to import into qiime2
uniquesToFasta(seqtab.nochim.es, fout='euprymna-hsp60-repseqs.fasta', ids=colnames(seqtab.nochim.es))
# write ASV table (seqtab.nochim) to a .txt file to import into QIIME2
write.table(t(seqtab.nochim.es), "euprymna-hsp60-seqtab.txt", sep="\t", row.names=TRUE, col.names=NA, quote=FALSE)
# assign taxonomy in QIIME2 using the pre-trained classifier created on 3/29/2023
| Table 1. Read Processing Summary. | |||||
| Number of total reads per sample retained at each major read processing step. | |||||
| DADA2.Input | Filtered | Denoised | Final.Read.Count | Percent.Reads.Retained | |
|---|---|---|---|---|---|
| BacMixA | 187727 | 144816 | 143560 | 134514 | 71.7 |
| BacMixB | 243557 | 185214 | 183127 | 169495 | 69.6 |
| ES114A | 213942 | 147339 | 146562 | 146562 | 68.5 |
| ES114B | 236500 | 188218 | 186530 | 186530 | 78.9 |
| EsApoA | 105387 | 59908 | 50130 | 47953 | 45.5 |
| EsApoB | 125013 | 74261 | 61726 | 59218 | 47.4 |
| kitcon | 109487 | 34443 | 33242 | 28694 | 26.2 |
| MB13A | 244337 | 195270 | 192878 | 192802 | 78.9 |
| MB15A | 193187 | 151865 | 151043 | 140966 | 73.0 |
| MB15B | 226335 | 181097 | 179617 | 157443 | 69.6 |
| MB211A | 238219 | 187994 | 184194 | 183945 | 77.2 |
| MB211B | 279112 | 220305 | 215882 | 215786 | 77.3 |
| MB212A | 220679 | 173520 | 170165 | 169809 | 76.9 |
| MB212B | 165352 | 127792 | 124365 | 124252 | 75.1 |
| SymES114A | 145488 | 98491 | 90008 | 85945 | 59.1 |
| SymES114B | 194096 | 116828 | 114422 | 108783 | 56.0 |
| SymES114VentA | 219768 | 132066 | 118801 | 112053 | 51.0 |
| SymES114VentB | 211779 | 159358 | 154424 | 153062 | 72.3 |
| SymWtA | 239707 | 155598 | 145987 | 140516 | 58.6 |
| SymWtB | 173729 | 119739 | 109516 | 105946 | 61.0 |
| SymWtC | 269185 | 195194 | 184417 | 180470 | 67.0 |
| VfMixA | 155317 | 123316 | 122424 | 120974 | 77.9 |
| VfMixB | 225861 | 178305 | 177472 | 175113 | 77.5 |
# import unique ASV sequences into QIIME2
qiime tools import --type 'FeatureData[Sequence]' --input-path euprymna-hsp60-repseqs.fasta --output-path euprymna-hsp60-repseqs.qza
# assign taxonomy to ASVs using pre-trained cpn60 classifier
qiime feature-classifier classify-hybrid-vsearch-sklearn \
--i-query euprymna-hsp60-repseqs.qza \
--i-reference-reads cpn60_v11_seqs.qza \
--i-reference-taxonomy cpn60_v11_taxonomy_table.qza \
--i-classifier cpn60_classifier_v11.qza \
--p-no-prefilter \
--p-maxaccepts all \
--p-maxrejects all \
--p-confidence 0.6 \
--o-classification euprymna_hsp60_classified
# export output files back out of QIIME2
qiime tools export \
--input-path euprymna_hsp60_classified.qza \
--output-path euprymna_hsp60_classified
# add a special header for BIOM in the feature-table:
echo -n "#OTU Table" | cat - euprymna-hsp60-seqtab.txt > euprymna-hsp60-biom-table.txt
# convert to BIOM v2.1:
biom convert -i euprymna-hsp60-biom-table.txt -o euprymna-hsp60.biom --table-type="OTU table" --to-hdf5
# open taxonomy tsv and change headers to "#OTUID taxonomy confidence", and save a copy as taxonomy.txt
# now updated table.biom file with that biom-taxonomy info:
biom add-metadata -i euprymna-hsp60.biom -o euprymna-hsp60-wtax.biom --observation-metadata-fp taxonomy.txt --sc-separated taxonomy
# then go import the biom-with-taxonomy in R
# save ASV sequences as a list "asv.seqs"
asv.seqs <- colnames(seqtab.nochim.es)
# format headers to name your ASV sequences in a fasta file
asv.headers <- vector(dim(seqtab.nochim.es)[2], mode="character")
for (i in 1:dim(seqtab.nochim.es)[2]) {
asv.headers[i] <- paste(">ASV", i, sep="")
}
# write out a FASTA of final ASV seqs:
asv.fasta <- c(rbind(asv.headers, asv.seqs))
write(asv.fasta, "euprymna-hsp60-ASVs.fasta")
# write out a count table with ASV-IDs instead of ASV-sequences as names:
asv.tab <- t(seqtab.nochim.es)
row.names(asv.tab) <- sub(">", "", asv.headers)
write.table(asv.tab, "euprymna-hsp60-ASVcounts.tsv", sep="\t", quote=F, col.names=NA)
# import euprymna-hsp60 biom as phyloseq object
es.hsp <- import_biom("euprymna-hsp60-wtax.biom")
taxa_names(es.hsp) <- rownames(asv.tab)
tax_table(es.hsp)[1:5]
otu_table(es.hsp)[1:5]
# rename columns in seqtab.nochim.es with sample name (from filt file name)
rownames <- rownames(seqtab.nochim.es)
#use sub() to rename row names
new.rownames <- sub("_.*$", "", rownames)
#print the new row names
print(new.rownames)
sample_names(es.hsp) <- new.rownames
rownames(seqtab.nochim.es) <- new.rownames
sample_data(es.hsp) <- es.hsp.meta
sample_variables(es.hsp)
ranks <- c("domain", "phylum", "class", "order", "family", "genus", "species")
colnames(tax_table(es.hsp))
colnames(tax_table(es.hsp)) <- ranks
tax_table(es.hsp)[1:5]
#format to best hit, do this before you assign "unclassified" labeling!
es.hsp <- format_to_besthit(es.hsp)
view(tax_table(es.hsp))
Run decontam to identify contaminating sequencing based on frequency and prevalence in negative extraction control samples. These ASVs are then pruned from the dataset.
# identify contaminants based on frequency
# "dnaquant": DNA concentration for each sample, used to normalize frequency of ASVs
contamdf.freq.es.hsp <- isContaminant(es.hsp, method="frequency", conc="dnaquant")
head(contamdf.freq.es.hsp)
table(contamdf.freq.es.hsp$contaminant) # identified 21 contaminants
head(which(contamdf.freq.es.hsp$contaminant))
# identify contaminants based on prevalence in negative extraction controls (NEC)
sample_data(es.hsp)$is.neg <- sample_data(es.hsp)$sample.type == "kitcon"
contamdf.prev.es.hsp <- isContaminant(es.hsp, method="prevalence", neg="is.neg")
table(contamdf.prev.es.hsp$contaminant) # identified 7 contaminants
head(which(contamdf.prev.es.hsp$contaminant))
x <- which(contamdf.prev.es.hsp$contaminant)
y <- which(contamdf.freq.es.hsp$contaminant)
# prune contaminant ASVs identified by decontam
contam.asvs.es.hsp <- c(x,y)
contam.asvs.es.hsp
contam.asvs.es.hsp[1:28]=paste0("ASV", contam.asvs.es.hsp[1:28])
contam.asvs.es.hsp
pop.taxa = function(es.hsp, contam.asvs.es.hsp){
allTaxa = taxa_names(es.hsp)
allTaxa <- allTaxa[!(allTaxa %in% contam.asvs.es.hsp)]
return(prune_taxa(allTaxa, es.hsp))
}
es.hsp = pop.taxa(es.hsp, contam.asvs.es.hsp)
es.hsp # 6033 taxa and 23 samples
# ^ 6061 non-chimeric ASVs - 28 contaminating ASVs = 6033 ASVs
# add reads per sample to phyloseq object metadata
sample_data(es.hsp)$reads.sample <- readcount(es.hsp)
sample_data(es.hsp)
euprymna.hsp60.metadata <- read.csv("es-hsp metadata.csv")
euprymna.hsp60.metadata <- column_to_rownames(euprymna.hsp60.metadata, var="X")
sample_data(es.hsp) <- euprymna.hsp60.metadata
sample_data(es.hsp)
es.hsp <- format_to_besthit(es.hsp)
# ^ This appends the best taxonomic hit (e.g. species name) to the ASV # ID
es.hsp.tx <- as.data.frame(tax_table(es.hsp))
colnames(es.hsp.tx) = c("Domain", "Phylum", "Class", "Order", "Family", "Genus", "Species", "best_hit")
head(es.hsp.tx)
head(tax_table(es.hsp))
# ^ Just adding the appropriate column headers to our tax table
es.hsp.tx$Family[es.hsp.tx$Phylum == "k__Bacteria"] <- "Unidentified Bacteria"
es.hsp.tx$Family[es.hsp.tx$Domain == "Unassigned"] <- "Unassigned at Kingdom Level"
unique(es.hsp.tx$Family)
# ^ Any ASVs that were not assigned taxonomy past the Phylum level will be labeled "k__Bacteria" at all other taxonomic levels, and ASVs that were not assigned taxonomy past the Domain level will be labeled "Unassigned" at all other taxonomic levels. This is just renaming those labels as "Unidentified Bacteria" and "Unassigned at Kingdom Level", respectively.
es.hsp.tx <- as.matrix(es.hsp.tx)
tax_table(es.hsp) <- es.hsp.tx
tax_table(es.hsp)[1:5,1:5]
# ^ Adding these updated taxonomic labels to our phyloseq object
phyloseq::sample_names(es.hsp)
es.sample.order <- c("kitcon", "ES114A", "ES114B", "VfMixA", "VfMixB", "BacMixA", "BacMixB", "EsApoA", "EsApoB", "SymES114VentA", "SymES114VentB", "SymES114A", "SymES114B", "SymWtA", "SymWtB", "SymWtC", "MB13A", "MB15A", "MB15B", "MB211A", "MB211B", "MB212A", "MB212B")
figure2_pal <- c("Aliivibrio fischeri ES114" = "#734f5a",
"Aliivibrio fischeri MB13B1" = "#264653",
"Aliivibrio fischeri MB13B2" = "#2a9d8f",
"Rossellomorea aquimaris TF-12" = "#f4a261",
"Pseudoalteromonas luteoviolacea HI1" = "#e76f51",
"Vibrio litoralis DSM17657" = "#941c2f",
"Other Species" = "#d5bdaf")
tax_palette_plot(figure2_pal)
# Subset only Bacterial Culture samples
es.hsp.culture <- es.hsp %>%
subset_samples(sample.type == "bacterial culture")
# Extract ASV data (abundance counts)
culture.asv.data <- otu_table(es.hsp.culture)
# Extract taxonomy data
culture.tax.data <- tax_table(es.hsp.culture)
# Convert ASV data to data frame and filter out ASVs not present in any culture samples
df.culture.asv <- as.data.frame(culture.asv.data)
# Sum the counts for each ASV and filter out those with zero counts across all samples
asv_sums <- rowSums(df.culture.asv)
df.culture.asv <- df.culture.asv[asv_sums > 0, ]
# Make sure to filter the taxonomy data to match the filtered ASV data
df.culture.tax <- as.data.frame(culture.tax.data)
df.culture.tax <- df.culture.tax[asv_sums > 0, ]
# Convert back to otu_table and tax_table
filtered_culture.asv.data <- otu_table(as.matrix(df.culture.asv), taxa_are_rows = TRUE)
filtered_culture.tax.data <- tax_table(as.matrix(df.culture.tax))
# Recreate the phyloseq object with the filtered data
es.hsp.culture.filtered <- phyloseq(filtered_culture.asv.data, filtered_culture.tax.data, sample_data(es.hsp.culture))
# Check the result
print(es.hsp.culture.filtered)
# Subset culture samples
sample_data(es.hsp.culture)$treatment <- factor(sample_data(es.hsp.culture)$treatment, levels = c("ES114 culture", "mixed V. fischeri culture", "mixed bacteria culture"))
fig1a <- es.hsp.culture %>%
tax_transform("compositional", chain=TRUE)
fig1a <- comp_barplot(fig1a,
"Species",
n_taxa = 6,
tax_order=c("Aliivibrio fischeri ES114",
"Aliivibrio fischeri MB13B1",
"Aliivibrio fischeri MB13B2",
"Rossellomorea aquimaris TF-12",
"Pseudoalteromonas luteoviolacea HI1",
"Vibrio litoralis DSM17657"),
merge_other=FALSE,
bar_width = 0.975,
other_name = "Other Species",
palette = figure2_pal
) +
facet_col(~ treatment, scales="free_y") +
labs(y = "Relative Abundance",
title = "Observed Strains",
fill = "Input Strains") +
scale_y_percent(limits = c(0, 1.05)) +
theme_biome_utils() +
theme(legend.text = element_text(face = "italic", size = 8),
axis.text.x = element_text(size = 8),
axis.text.y = element_text(size = 8),
axis.title.y = element_text(size = 9),
axis.title.x = element_text(size = 9),
strip.text = element_text(hjust = 0.5, face = "bold"),
plot.title = element_text(size = 10, hjust = 0, face = "bold"),
legend.title = element_text(size = 10),
legend.position = "left",
strip.background = element_blank(),
panel.border = element_blank(),
legend.key.width = unit(0.3, "cm"),
legend.key.height = unit(0.3, "cm")) +
coord_flip()
pdf("fig1a.pdf", width=7, height=7)
fig1a +
plot_layout(heights = c(2, 2, 2), ncol = 1) +
theme(plot.margin = unit(c(1, 1, 0, 1), "cm")) # Top, Right, Bottom, Left
dev.off()
library(msa)
library(ggtree)
library(phangorn)
library(ape)
vfhsp60 <- readDNAStringSet("hsp60_ncbi_vf.fasta")
names(vfhsp60) <- make.unique(names(vfhsp60))
# ^ This should already be true, but just making sure all sequences have unique names
# Perform multiple sequence alignment using ClustalW (you can choose other tools like ClustalOmega or MUSCLE)
alignment <- msa(vfhsp60, method = "ClustalW")
# Convert the alignment to a DNAStringSet object
aligned_seqs <- as(alignment, "DNAStringSet")
# Convert the DNAStringSet to a matrix suitable for phylogenetic analysis
alignment_matrix <- as.matrix(aligned_seqs)
# Convert the alignment matrix to a phyDat object
alignment_phyDat <- phyDat(alignment_matrix, type = "DNA")
# Create a distance matrix
dist_matrix <- dist.ml(alignment_phyDat)
pdf("fig1b.pdf", height = 20, width = 5)
tree
dev.off()
# Look at current naming scheme
head(tax_table(es.hsp)[, "Species"])
# Function to remove strain names from taxa names
remove_strain_names <- function(taxa_name) {
# Use gsub to remove anything after the second part of the name
gsub("(\\w+\\s\\w+).*", "\\1", taxa_name)
}
# Duplicate es.hsp phyloseq object so the original naming scheme still exists
es.hsp.2 <- es.hsp
# Apply the function to all taxa names in the phyloseq object
tax_table(es.hsp.2) <- apply(tax_table(es.hsp.2), 2, function(x) remove_strain_names(x))
# Example to check if it worked (assuming tax_table has "Species" as a column)
head(tax_table(es.hsp.2)[, "Species"])
# Extract the OTU and taxonomy tables
otu_data <- otu_table(es.hsp.2)
tax_data <- tax_table(es.hsp.2)
# Identify which taxa belong to "Aliivibrio fischeri"
fischeri_indices <- which(tax_data[, "Species"] == "Aliivibrio fischeri")
# Calulate the total abundance for each sample
total_abundance <- colSums(otu_data)
# Calculate the abundance of Aliivibrio fischeri in each sample
fischeri_abundance <- colSums(otu_data[fischeri_indices, ])
# Calculate as a percentage
fischeri_percentage <- (fischeri_abundance / total_abundance) * 100
# Access the sample_data of the phyloseq object
es.hsp.meta <- sample_data(es.hsp.2)
# Add the fischeri_percentage as a new column
es.hsp.meta$fischeri_percentage <- fischeri_percentage
# Replace the sample_data in the phyloseq object with the modified sample_data
sample_data(es.hsp.2) <- es.hsp.meta
head(sample_data(es.hsp.2))
# Extract the fischeri_percentage from the sample_data
fischeri_percent_data <- sample_data(es.hsp.2)$fischeri_percentage
# Create a new label in the es.hsp sample data that concatenates the sample name and the Aliivibrio fischeri percentage (rather than trying to put the Aliivibrio fischeri percentage on a different axis, much more complicated)
new_labels <- paste(sample_names(es.hsp.2), sprintf(" (%.2f%%)", fischeri_percent_data))
sample_data(es.hsp.2)$FischeriLabel <- paste(sample_data(es.hsp.2)$SAMPLE, new_labels)
phyloseq::sample_names(es.hsp.2)
## [1] "BacMixA" "BacMixB" "ES114A" "ES114B"
## [5] "EsApoA" "EsApoB" "kitcon" "MB13A"
## [9] "MB15A" "MB15B" "MB211A" "MB211B"
## [13] "MB212A" "MB212B" "SymES114A" "SymES114B"
## [17] "SymES114VentA" "SymES114VentB" "SymWtA" "SymWtB"
## [21] "SymWtC" "VfMixA" "VfMixB"
fig2a_order <- c("EsApoB", "EsApoA", "SymWtC", "SymWtB", "SymWtA", "SymES114B", "SymES114A", "SymES114VentB", "SymES114VentA", "MB212B", "MB212A", "MB211B", "MB211A", "MB15B", "MB15A", "MB13A")
fig2a_palette <- c("Aliivibrio fischeri" = "#104911",
"Vibrio (Genus)" = "lightblue",
"Other Taxa" = "#d5bdaf")
tax_palette_plot(fig2a_palette)
# Use es.hsp.2 for species-level taxa
# Subset sample types of interest: juvenile squid, juvenile squid ventate, adult squid
es.hsp_filtered <- subset_samples(es.hsp.2, sample.type %in% c("juvenile squid", "juvenile squid ventate", "adult squid"))
# Perform compositional transformation (relative abundance)
es.hsp_filtered <- es.hsp_filtered %>%
tax_transform("compositional", chain = TRUE)
# Ensure the tax_table is in the correct format and modify it to label all species within the "Vibrio" genus
tax_table_temp <- as.data.frame(tax_table(es.hsp_filtered))
tax_table_temp$Species <- ifelse(tax_table_temp$Genus == "Vibrio", "Vibrio (Genus)", tax_table_temp$Species)
tax_table(es.hsp_filtered) <- tax_table(as.matrix(tax_table_temp))
# Create the bar plot with n_taxa = 2 for "Aliivibrio fischeri" and the "Vibrio" genus
fig2a <- comp_barplot(es.hsp_filtered, "Species",
n_taxa = 2, # Only highlight 2 taxa
label = "FischeriLabel", # sample name + percent Vf
sample_order = fig2a_order,
merge_other = TRUE, # Merge other taxa into "Other Species"
bar_width = 0.975,
palette = fig2a_palette,
group_by = "treatment",
other_name = "Other Taxa",
tax_order = c("Aliivibrio fischeri", "Vibrio (Genus)")
)
# Wrap Plots
fig2a <- patchwork::wrap_plots(fig2a, ncol = 1, guides = "collect", scale = "free_x") &
labs(y = "Relative Abundance") &
scale_y_percent(limits = c(0, 1.05)) &
theme_biome_utils() &
theme(
legend.text = element_text(face = "italic", size = 9),
axis.text.x = element_text(size = 8),
axis.text.y = element_text(size = 8),
axis.title.y = element_text(size = 9),
axis.title.x = element_text(size = 8),
strip.text = element_text(hjust = 0.5),
legend.key.width = unit(0.3, "cm"),
legend.key.height = unit(0.3, "cm"),
plot.title = element_text(size = 10, hjust = 0, face = "bold"
),
legend.title = element_blank(),
legend.position = "bottom",
panel.border = element_blank(),
legend.box.margin = margin(0, 2, 0, 0, "cm")
) &
coord_flip()
fig2a <- fig2a + plot_layout(heights = c(2, 3, 2, 2, 7), ncol = 1)
pdf("fig2a.pdf", height = 6, width = 5)
par(mar=c(9,5,1,2), cex=1)
fig2a + plot_layout(heights = c(2, 3, 2, 2, 7), ncol = 1)
dev.off()
fig2b_order <- c("EsApoB", "EsApoA", "SymWtC", "SymWtB", "SymWtA", "SymES114B", "SymES114A", "SymES114VentA", "SymES114VentB", "MB212B", "MB212A", "MB211B", "MB211A", "MB15B", "MB15A", "MB13A")
fig2b_palette <- c("Vibrionaceae" = "lightblue",
"Bradyrhizobiaceae" = "#9c8bb3",
"Methylobacteriaceae" = "#604f78",
"Rhizobiaceae" = "darkseagreen2",
"Rhodobacteraceae" = "darkseagreen3",
"o__Rhodobacterales" = "darkseagreen4",
"c__Proteobacteria" = "indianred1",
"c__Alphaproteobacteria" = "indianred3",
"Alteromonadaceae" = "lightgoldenrod1",
"c__Gammaproteobacteria" = "lightgoldenrod3",
"c__Actinomycetia" = "darkslategray4",
"c__Actinobacteria" = "darkslategray",
"Other Taxa" = "#d5bdaf",
"Unidentified Bacteria" = "blanchedalmond")
tax_palette_plot(fig2b_palette)
fig2b_taxorder <- c("Vibrionaceae",
"Bradyrhizobiaceae",
"Methylobacteriaceae",
"Rhizobiaceae",
"Rhodobacteraceae",
"o__Rhodobacterales",
"c__Proteobacteria",
"c__Alphaproteobacteria",
"Alteromonadaceae",
"c__Gammaproteobacteria",
"c__Actinomycetia",
"c__Actinobacteria",
"Other Taxa",
"Unidentified Bacteria")
es.hsp.fig2b <- es.hsp.2
# Extract the OTU and taxonomy tables
otu_data <- otu_table(es.hsp.fig2b)
tax_data <- tax_table(es.hsp.fig2b)
# Identify which taxa belong to "Aliivibrio fischeri"
nonfischeri_indices <- which(tax_data[, "Genus"] != "Aliivibrio")
# Calulate the total abundance for each sample
total_abundance <- colSums(otu_data)
# Calculate the abundance of "fischeri" in each sample
nonfischeri_abundance <- colSums(otu_data[nonfischeri_indices, ])
# Calculate as a percentage
nonfischeri_percentage <- (nonfischeri_abundance / total_abundance) * 100
# Access the sample_data of the phyloseq object
es.hsp.fig2b.meta <- sample_data(es.hsp.fig2b)
# Add the fischeri_percentage as a new column
es.hsp.fig2b.meta$nonfischeri_percentage <- nonfischeri_percentage
# Replace the sample_data in the phyloseq object with the modified sample_data
sample_data(es.hsp.fig2b) <- es.hsp.fig2b.meta
head(sample_data(es.hsp.fig2b))
## sample.type treatment squid.id dnaquant color
## BacMixA bacterial culture mixed bacteria culture culture 0.374 #D4BC4D
## BacMixB bacterial culture mixed bacteria culture culture 0.266 #D4BC4D
## ES114A bacterial culture ES114 culture culture 0.556 #138808
## ES114B bacterial culture ES114 culture culture 0.300 #138808
## EsApoA juvenile squid apo juvenile apo juvenile 1.020 #536878
## EsApoB juvenile squid apo juvenile apo juvenile 2.080 #536878
## is.neg reads.sample vibrionaceae_percentage VibrionaceaeLabel
## BacMixA FALSE 127601 94.8127366 BacMixA (94.81%)
## BacMixB FALSE 156985 94.4943784 BacMixB (94.49%)
## ES114A FALSE 146490 99.2825449 ES114A (99.28%)
## ES114B FALSE 186324 98.8423392 ES114B (98.84%)
## EsApoA FALSE 47636 0.2540096 EsApoA (0.25%)
## EsApoB FALSE 58711 1.2757405 EsApoB (1.28%)
## fischeri_percentage FischeriLabel nonfischeri_percentage
## BacMixA 80.41394660 BacMixA (80.41%) 19.5860534
## BacMixB 74.69694557 BacMixB (74.70%) 25.3030544
## ES114A 99.28254488 ES114A (99.28%) 0.7174551
## ES114B 98.83965565 ES114B (98.84%) 1.1576608
## EsApoA 0.09446637 EsApoA (0.09%) 99.9055336
## EsApoB 0.22823662 EsApoB (0.23%) 99.7717634
# Extract the fischeri_percentage from the sample_data
nonfischeri_percent_data <- sample_data(es.hsp.fig2b)$nonfischeri_percentage
# Create a new label in the es.hsp.fig2b sample data that concatenates the sample name and the non-Vibrio fischeri percentage (rather than trying to put the non-Vibrio fischeri percentage on a different axis, much more complicated)
new_labels <- paste(sample_names(es.hsp.fig2b), sprintf(" (%.2f%%)", nonfischeri_percent_data))
sample_data(es.hsp.fig2b)$NonFischeriLabel <- paste(sample_data(es.hsp.fig2b)$SAMPLE, new_labels)
pdf("fig2b.pdf", height = 7, width = 5)
par(mar=c(9,5,1,2), cex=1)
fig2b + plot_layout(heights = c(2, 3, 2, 2, 7), ncol = 1)
dev.off()